编译原理实验：语法分析器

「已注销」

已于 2023-05-15 17:10:59 修改

阅读量4.3k

点赞数 6

文章标签： c++

于 2023-05-13 22:42:53 首次发布

本文链接：https://blog.csdn.net/qq_26956653/article/details/130663458

版权

实验二语法分析

一、实验目的

采用自上而下的方法实现PL/0语言程序的语法分析器，以加深对自上而下语法分析过程的理解。

二、实验要求

1、要求每位同学单独完成语法分析器，并接受检查；

2、撰写实验报告。

（1）对示例PL/0程序进行语法分析后的输出结果；

（2）对示例PL/0程序至少三次不同位置的改动，引入语法错误，给出改动情况和语法分析后的报错的对应；

（3）用流程图或伪代码的形式描述典型子程序的结构（可选）；

（4）书写设计和实现的过程中出现的一些问题以及解决方法（可选）。

三、实验内容

1、PL/0文法：

在词法规则基础上，引入一组非终结符和产生式集合构成PL/0语言的上下文无关文法。

VN = { program, block, statement, condition, expression, term, factor }

其中：program为开始符号；

VT = { ident, number, "const", "var", "procedure", "call", "begin", "end", "if", "then", "while", "do", "odd", ".", ",", "=", ";", ":=", "#", "<", ">", "+", "-", "*", "/", "(", ")" }

其中：ident代表标识符，number代表数值，双引号括起来的符号是源程序中出现的原始字符串（包括关键字、算符等），每个对应一个单词种别。

2、实验步骤

（1）按照EBNF文法设计语法分析程序的整体结构；

（2）针对每个语法单位编写相应的子程序，完成递归下降分析程序。

3、程序功能要求

（1）程序的输入：可以读取标准输入，或者打开指定的源程序文件。

（2）复用实验一的词法分析器代码，词法分析得到的词法单位对应文法中的终结符。

（3）对语法正确的示例程序，分析后给出“语法正确”的提示。如果分析过程中遇到错误不需要尝试恢复分析，停止该次分析过程即可，但应尽量给出说明性较强的错误提示。

以下为待分析代码

// PL/0 demo

(*
	This is a multi-line 
	comment
*)

const limit = 10;
var n, f, test, t1, t2;
begin
     n := 0;
     f := 1;
     while n # limit do
     begin
          n := n + 1;
          f := f * n;
     end;
     test := 1 + 2 - 3 * 4 / ( 5 - 6 ) - 7;
	t1 := test * 2;
	t2 := t1 + test;
     call print;	// print all var
end.

以下为语法分析器实现代码

# include<iostream>
# include<string>
# include<fstream>
# include<sstream>
# include<vector>
# include<map>
#include <iostream>
using namespace std;

//词法分析器
//以下是我对单词种别划分
/*
    关键字：
    begin       0
    end         1
    if          2
    then        3
    while       4
    do          5
    const       6
    var         7
    call        8
    procedure   9
    odd         10

    运算符：
    +           11
    -           12
    *           13
    /           14
    =           15
    #           16
    <           17
    >           18
    =           19
    (           20
    )           21
    ,           22
    .           23
    ;           24

    标识符：    25
    常数：      26
*/

//函数列表
bool isIdentifier(string s);//标识符
bool isKeywords(string s);  //关键字
bool isDigit(string s);     //整型数字
bool isOperator(string s);  //多字符运算符
bool isOperator(char c);    //单字符运算符
string result(string s);    //根据传入的参数s产生对应的输出

//函数实现
bool isIdentifier(string s)
{
    if (!isKeywords(s)) //标识符不能是关键字
    {
        if ((s[0] >= 'a' && s[0] <= 'z') || (s[0] >= 'A' && s[0] <= 'Z'))   //首字母必须是字母
        {
            for (int i = 1; i < s.length(); i++)
            {
                if ((s[i] >= 'a' && s[i] <= 'z') || (s[i] >= 'A' && s[i] <= 'Z')    //非首字母能为字母或数字
                    || (s[i] >= '0' && s[i] <= '9'))
                    continue;
                else return false;
            }
            return true;
        }
        return false;
    }
    return false;
}

bool isKeywords(string s)
{
    static vector<string> keyVec = { "begin" , "end" , "if" , "then" , "while" , "do" , "const" ,"var" , "call" , "procedure" , "odd" };  //PL/0的关键字列表
    vector<string>::iterator result = find(keyVec.begin(), keyVec.end(), s);    //查找字符串是否存在关键词列表中
    if (result != keyVec.end()) //如果结果不为end则存在，也即该词为关键字
        return true;
    else return false;
}

bool isDigit(string s)
{
    if (s[0] >= '0' && s[0] <= '9') //首字符为数字
    {
        for (int i = 1; i < s.length(); ++i)    //后面字符也为数字
            if (s[i] >= '0' && s[i] <= '9')
                continue;
            else return false;
        return true;
    }
    return false;
}

bool isOperator(string s)
{
    static vector<string> opeVec = { "+" , "-" , "*" , "/" , "=", "#" , "<" , ">" , ":=" , "(" , ")" , "," , "." , ";" };   //PL/0的运算符列表
    vector<string>::iterator result = find(opeVec.begin(), opeVec.end(), s);    //
    if (result != opeVec.end())
        return true;
    else return false;
}

bool isOperator(char c)
{
    static vector<char> opeVec = { '+' , '-' , '*' , '/' , '=', '#' , '<' , '>' , '(' , ')' , ',' , '.' , ';' , ':' };    //PL/0的单字符运算符列表,其中单字:应也算为合法
    vector<char>::iterator result = find(opeVec.begin(), opeVec.end(), c);
    if (result != opeVec.end())
        return true;
    else return false;
}

string result(string s) //根据传入的参数s产生对应的输出
{
    //种别码
    //1.标识符
    if (isIdentifier(s))
        return "(标识符--25," + s + ")";

    //2.整型常量
    if (isDigit(s))
        return "(整型常量--26," + s + ")";

    //建立静态字典
    static map<string, string> WordsDictionary;

    //3.关键字
    WordsDictionary["begin"] = "0";
    WordsDictionary["end"] = "1";
    WordsDictionary["if"] = "2";
    WordsDictionary["then"] = "3";
    WordsDictionary["while"] = "4";
    WordsDictionary["do"] = "5";
    WordsDictionary["const"] = "6";
    WordsDictionary["var"] = "7";
    WordsDictionary["call"] = "8";
    WordsDictionary["procedure"] = "9";
    WordsDictionary["odd"] = "10";
    if (isKeywords(s))
        return "(关键字--" + WordsDictionary[s] + "," + s + ")";

    //4.运算符
    WordsDictionary["+"] = "11";
    WordsDictionary["-"] = "12";
    WordsDictionary["*"] = "13";
    WordsDictionary["/"] = "14";
    WordsDictionary["="] = "15";
    WordsDictionary["#"] = "16";
    WordsDictionary["<"] = "17";
    WordsDictionary[">"] = "18";
    WordsDictionary[":="] = "19";
    WordsDictionary["("] = "20";
    WordsDictionary[")"] = "21";
    WordsDictionary[","] = "22";
    WordsDictionary["."] = "23";
    WordsDictionary[";"] = "24";
    if (isOperator(s))
        return "(运算符--" + WordsDictionary[s] + "," + s + ")";
    return "Error";
}

//分析词法,并将其记录下来，用以之后的的语法分析
vector<string> LexicalAnalysis()
{
    string file = ("TestData1.txt");
    ifstream input(file);   //输入文件流,注意编码，文本文件编码格式需和项目一直，否则乱码

    ofstream output("Result.txt", ofstream::app);   //先将TtestData.txt内容拷贝到Result.txt中
    string copy;

    getline(input, copy, '\0');
    cout << copy << endl;   //测试是否正确

    input.close();
    input.open(file);   //此时input已经指到了文件尾，为了后面的读取，需要关闭再打开

    output << "原数据:\n";
    output << copy << endl;
    output << "处理后结果:\n";   //测试结果要求以原数据与结果对照的形式输出并保存在Result.txt中，同时要把结果输出到屏幕。

    string str;
    string words;

    cout << "处理后结果:\n";
    bool skip = false;

    vector<string> AllWords;    //记录所有被识别的单词

    while (getline(input, str)) //读取文件每一次读取一行,遇到EOF结束
    {
        istringstream strCin(str);  //从输入流中获取单词，需要用到输入流对象，即istringstream
        string s;
        if (skip)
        {
            if (str.find("*)") != -1)
                skip = false;
            continue;
        }
        while (strCin >> words)
        {
            if (words == "//")  //跳过注释
                break;
            else if (words == "(*" || words == "*)")
            {
                skip = true;
                break;
            }
            /*注意处理逗号，比如int a,b;这里有一个单词"a,b;”,所以要处理一个字符串里面的各种运算符,但是这样会很麻烦，
            发现没有，用ide写代码写完一句输入分号时，ide会自动加入空格，这样就方便处理多了*/

            //1.首先可以确定的是关键字肯定是单独作为一个单词的
            if (isKeywords(words))
            {
                s = result(words);
                AllWords.push_back(words);  //记录关键字
                cout << s << endl;
                output << s << endl;
                continue;
            }

            //2,对单词进行扫描，肯定是标识符，运算符，逗号分号,数字等等混合在一起的单词
            vector<int> index = { 0 };
            for (int i = 0; i < words.length(); i++)
            {

                //运算符有两位的，比如"<=",">=","==","!="
                if ((i < words.length() - 1) && isOperator(words[i]) && isOperator(words[i + 1]))
                {
                    //但是要注意只有以上四种两位运算符，比如+-,))就不是,但是))还是要输出),)
                    if (string(words.begin() + i, words.begin() + i + 2) == ":=")
                    {
                        if (find(index.begin(), index.end(), i) == index.end()) //避免重复下标
                            index.push_back(i);
                        index.push_back(i + 2);
                        ++i;
                    }
                    else if (isOperator(words[i]))
                    {
                        if (find(index.begin(), index.end(), i) == index.end())
                            index.push_back(i);
                        if (find(index.begin(), index.end(), i + 1) == index.end())
                            index.push_back(i + 1);

                    }
                }
                //逗号，运算符作为分隔
                else if (isOperator(words[i]))
                {
                    if (find(index.begin(), index.end(), i) == index.end())
                        //比如遇到"a,b"这里下标0和1将a分开，1到2将逗号分开，2到3将b分开
                        index.push_back(i);
                    if (find(index.begin(), index.end(), i + 1) == index.end())
                        index.push_back(i + 1);

                    //如果是a<=b这样的呢？一样，先0和1将a分开，1和2将<分开，2和3将=分开
                    //3和4将b分开，然后后面分隔单词时，注意如果相邻都是运算符，则忽略，比如
                    //后面判断到1和2，2和3都是运算符，则忽略2

                }

                for (int i = 0; i < index.size() - 1; i++)
                {
                    string rel;
                    //比如遇到"<="，需要提取”<=“
                    /*if (isOperator(words[index[i]]) && isOperator(words[index[i + 1]]))
                    {
                        rel = result(string(words.begin() + index[i], words.begin() + index[i + 2]));
                        ++i;
                    }
                    else*/
                    rel = result(string(words.begin() + index[i], words.begin() + index[i + 1]));
                    AllWords.push_back(string(words.begin() + index[i], words.begin() + index[i + 1])); //记录单词


                    output << rel << endl;
                    cout << rel << endl;
                }


            }
            if (index.size() == 1)  //考虑一下是不是标识符
            {
                string rel;
                rel = result(words);
                AllWords.push_back(words);//记录标识符
                output << rel << endl;
                cout << rel << endl;
            }
        }
    }
    output.close();
    input.close();
    return AllWords;
}
//词法分析器



//语法分析器
//token数据及下标定义
vector<string> tokens = LexicalAnalysis();
int index = 0;
bool success = true;
string symbolarray[] = { "=", "#", "<", ">" };
int symbollength = 4;

//对实验文档中的消除左递归和回溯的PL/0的EBNF文法分析
/*
    program：程序，由 block 和 "." 组成。
    block：代码块，包含 constdecl、vardecl、procdecl 和 statement 四部分。
    constdecl：常量声明，由可选的 "const" 关键字，多个 constitem，以及 ";" 组成。
    constitem：常量条目，包括标识符 ident 和数字 number 之间的 "=" 号连接。
    vardecl：变量声明，由可选的 "var" 关键字，多个标识符 ident，以及 ";" 组成。
    procdecl：过程声明，由多个 "procedure" 关键字，标识符 ident，代码块 block 和 ";" 组成。
    statement：语句，包括赋值语句 assignstmt、过程调用语句 callstmt、复合语句 compstmt、条件语句 ifstmt 和循环语句 whilestmt。
    assignstmt：赋值语句，包括标识符 ident，":=" 连接后面的 expression。
    callstmt：过程调用语句，包括 "call" 关键字和标识符 ident。
    compstmt：复合语句，由 "begin" 关键字和多个语句 statement，以及 "end" 关键字组成。
    ifstmt：条件语句，由 "if" 关键字、condition、"then" 关键字和语句 statement 组成。
    whilestmt：循环语句，由 "while" 关键字、condition、"do" 关键字和语句 statement 组成。
    condition：条件判断，包括 "odd" 和 expression 之间的关系运算符（"="、"#"、"<"、">"）。
    expression：表达式，由 term 和多个加减法运算符组成。
    term：项，由 factor 和多个乘除法运算符组成。
    factor：因子，包括可选的正负号、标识符 ident、数字 number 和括号中的 expression。
*/

//函数列表，注释为对应语法单位
void exception_print(int type, string expected);    //打印异常
void match(string expected);           //单输入匹配函数
void match(string expected, bool majority);             //多输入匹配函数
void parse_program();   //program -> block "."
void parse_block();     //block -> constdecl vardecl procdecl statement
void parse_constdecl(); //constdecl ->["const" constitem{ "," constitem } ";"]
void parse_constitem(); //constitem -> ident "=" number
void parse_vardecl();   //vardecl -> ["var" ident {"," ident} ";"]
void parse_procdecl();  //procdecl -> {"procedure" ident ";" block ";"}
void parse_statement(); //statement -> assignstmt | callstmt | compstmt | ifstmt | whilestmt
void parse_assignstmt();//assignstmt ->[ident ":=" expression]
void parse_callstmt();  //callstmt -> ["call" ident]
void parse_compstmt();  //compstmt -> ["begin" statement {";" statement} "end"]
void parse_ifstmt();    //ifstmt -> ["if" condition "then" statement]
void parse_whilestmt(); //whilestmt -> ["while" condition "do" statement]
void parse_condition(); //condition = "odd" expression | expression ("="|"#"|"<"|">") expression
void parse_expression();//expression -> term { ("+"|"-") term}
void parse_term();      //term -> factor {("*"|"/") factor}
void parse_factor();    //factor -> [ "+"|"-"] (ident | number | "(" expression ")")
void parse_ident();     //ident -> letter {letter | digit}
void parse_number();    //number -> digit {digit}

//函数实现
void exception_print(int type, string expected = "")
{
    string rel = "";
    switch (type)
    {
    case 0: //语法错误
        rel = "语法错误：预期为 " + expected + "，但输入的是 " + tokens[index];
        break;
    case 1: //标识符错误
        rel = "标识符 " + tokens[index] + " 格式错误";
        break;
    case 2: //整数错误
        rel = "整数 " + tokens[index] + "格式错误";
        break;
    }
    ofstream output("Result.txt", ofstream::app);
    cout << rel << endl;
    output << rel << endl;
    success = false;
    exit(100);
}

void match(string expected)
{
    if (tokens[index] == expected)
        index++;
    else
        exception_print(0, expected);
}

void match(string expected, bool majority)
{
    bool iseuqal = false;
    for (int i = 0; i < symbollength; i++)
    {
        if (tokens[index] == symbolarray[i])
        {
            index++;
            iseuqal = true;
            break;
        }
        expected += symbolarray[i];
        if (i != symbollength - 1)
            expected += " ";
    }
    if (!iseuqal)
        exception_print(0, expected);
}

void parse_program()
{
    parse_block();
    match(".");
}

void parse_block()
{
    parse_constdecl();
    parse_vardecl();
    parse_procdecl();
    parse_statement();
}

void parse_constdecl()
{
    if (tokens[index] == "const")
    {
        match("const");
        parse_constitem();
        while (tokens[index] == ",")
        {
            match(",");
            parse_constitem();
        }
        match(";");
    }
}

void parse_constitem()
{
    parse_ident();
    match("=");
    parse_number();
}

void parse_vardecl()
{
    if (tokens[index] == "var")
    {
        match("var");
        parse_ident();
        while (tokens[index] == ",")
        {
            match(",");
            parse_ident();
        }
        match(";");
    }
}

void parse_procdecl()
{
    while (tokens[index] == "procedure")
    {
        match("procedure");
        parse_ident();
        match(";");
        parse_block();
        match(";");
    }
}

void parse_statement()
{
    if (isIdentifier(tokens[index]))
        parse_assignstmt();
    else if (tokens[index] == "call")
        parse_callstmt();
    else if (tokens[index] == "begin")
        parse_compstmt();
    else if (tokens[index] == "if")
        parse_ifstmt();
    else if (tokens[index] == "while")
        parse_whilestmt();
}

void parse_assignstmt()
{
    parse_ident();
    match(":=");
    parse_expression();
}

void parse_callstmt()
{
    match("call");
    parse_ident();
}

void parse_compstmt()
{
    match("begin");
    parse_statement();
    while (tokens[index] == ";")
    {
        match(";");
        parse_statement();
    }
    match("end");
}

void parse_ifstmt()
{
    match("if");
    parse_condition();
    match("then");
    parse_statement();
}

void parse_whilestmt()
{
    match("while");
    parse_condition();
    match("do");
    parse_statement();
}

void parse_condition()
{
    if (tokens[index] == "odd")
    {
        match("odd");
        parse_expression();
    }
    else
    {
        parse_expression();
        match("",true);
        parse_expression();
    }
}

void parse_expression()
{
    parse_term();
    while (tokens[index] == "+" || tokens[index] == "-")
    {
        match(tokens[index]);
        parse_term();
    }
}

void parse_term()
{
    parse_factor();
    while (tokens[index] == "*" || tokens[index] == "/")
    {
        match(tokens[index]);
        parse_factor();
    }
}

void parse_factor()
{
    if (tokens[index] == "+" || tokens[index] == "-")
        match(tokens[index]);
    if (isDigit(tokens[index]))
        parse_number();
    else if (isIdentifier(tokens[index]))
        parse_ident();
    else
    {
        match("(");
        parse_expression();
        match(")");
    }
}

void parse_ident()
{
    if (isIdentifier(tokens[index]))
        match(tokens[index]);
    else
        exception_print(1);
}
void parse_number()
{
    if (isDigit(tokens[index]))
        match(tokens[index]);
    else
        exception_print(2);
}

//分析语法，并输出其结果
bool GrammaticalAnalysis()
{
    parse_program();
    ofstream output("Result.txt", ofstream::app);
    if (success)
    {
        cout << "语法正确" << endl;
        output << "语法正确" << endl;
    }
    return success;
}
//语法分析器

//主方法
int main()
{
    GrammaticalAnalysis();
    system("pause");
    return 0;
}