编译原理实验1《词法分析程序设计与实现》

最新推荐文章于 2022-01-12 11:03:35 发布

Mad-Eye

最新推荐文章于 2022-01-12 11:03:35 发布

阅读量3.5k

点赞数 3

分类专栏：实验日志

本文链接：https://blog.csdn.net/Andy_Luke/article/details/115427519

版权

实验日志专栏收录该内容

6 篇文章 5 订阅

订阅专栏

一、实验目的

加深对词法分析器的工作过程的理解；加强对词法分析方法的掌握；能够采用一种编程语言实现简单的词法分析程序；能够使用自己编写的分析程序对简单的程序段进行词法分析。

二、实验内容

自定义一种程序设计语言，或者选择已有的一种高级语言，编制它的词法分析程序。词法分析程序的实现可以采用任何一种编程语言和编程工具。从输入的源程序中，识别出各个具有独立意义的单词，即关键字（1）、标识符（2）、常数（3）、运算符（4）、界符（5）。并依次输出各个单词的内部编码及单词符号自身值。（遇到错误时可显示“Error”，然后跳过错误部分继续显示）
关键字：if、int、for、while、do、return、break、continue
运算符：+、-、*、/、=、>、<、>=、<=、==、!=
界符：,、;、{、}、(、)

三、实验方法

实验方法：采用c++语言实现，通过编写c++程序模拟实现词法分析器原理。
原理：通过输入一个文件，程序对文件中的内容进行处理，然后输出各个单词的内部编码及单词符号的自身值。

四、实验步骤

根据已有的状态转换图画出流程图：
在这里插入图片描述

五、实验结果

输入数据（文件）：
输入1（test.cpp)¹

int power(int m, int n){
    int result;
    for(int i = 1; i <= n; i=i+1){ //这里i=i+1没有空格是为了判断不同书写格式对词法分析程序是否有影响。
        result = result*m;
    }
}

int main(){
    int a, b, intc;//这里intc是为了检查词法分析程序是否可以做到“最长匹配”。
    a = 10;
    b = a + 10;
    b = power(b);
}

输出1（由于程序太长，我将输出规范化了一下，即每5个单词换一次行）
在这里插入图片描述
输入2(test.java)¹

int a = 0;
for(int i = 0; i < 10; i++)
{
    $这里是为了检查词法分析程序是否可以做到识别错误字符。
    a = ~a + i; 同上
}

输出2
可以看到，程序可以识别出错误的字符。
在这里插入图片描述

六、实验结论

能够分析正常的程序，并且支持’>=’，’<=‘等运算符；同时如果存在类似“intf”这样的单词存在，可以判断出是一个标识符而不是关键字（最长匹配）。

附源代码

#include<iostream>
#include<iomanip>
#include<cstdio>
#include<fstream>
#include<cassert>
#include<string>

using namespace std;

//is Keyword or not
bool isKeyword(string s) {
    //keyword array
    string keywords[] = { "if", "int", "for", "while", "do", "return", "break", "continue" };
    
    //compare with keywords[]
    for (int i = 0; i < sizeof(keywords); i++) {
        if (s == keywords[i])return true;
    }
    //s does not belong to keywords array
    return false;
}

//is operator or not
bool isOperator(char c) {
    if (c == '+' || c == '-' || c == '*' || c == '/' || c == '=' || c == '>' || c == '<' || c == '!')
        return true;
    else return false;
}

//is separator or not
bool isSeparator(char c) {
    if (c == ',' || c == ';' || c == '{' || c == '}' || c == '(' || c == ')'||c == ' ')
        return true;
    return false;
}

int main() {
    //store single chars from input
    char c;
    //store result tempraraily
    string result;
    //store the results of the lexical analyzer
    string resultArray[300];
    //index of recogized words
    int index = 0;

    //file name of the original program
    string file = "test.java";

    ifstream in;
    in.open(file.data());
    assert(in.is_open());

    //read in a char
    in >> c;

    //read until end of file
    while (!in.eof()) {
        //see if c is letter/'_'
        if (isalpha(c) || c == '_') {
            result.append(1, c);
            in.get(c);
            see if it is keyword
            //if (isKeyword(result)) {
            //  resultArray[index] = "(1, \"" + result + "\")";
            //  index++;
            //  result = "";
            //}

            //form the word
            while (isalpha(c) || isdigit(c) || c == '_') {
                result.append(1, c);
                in.get(c);/*
                if (isKeyword(result)) {
                    resultArray[index++] = "(1, \"" + result + "\")";
                    result = "";
                    continue;
                }*/
            }
            //when encountering separator of operator
            if (isSeparator(c) || isOperator(c)) {
                if (isKeyword(result)) {
                    resultArray[index++] = "(1, \"" + result + "\")";
                    result = "";
                    continue;
                }
                else {
                    resultArray[index++] = "(2, \"" + result + "\")";
                    result = "";
                    continue;
                }
            }
            else {
                //not digit or number or se[aratpr or operator
                result.append(1, c);
                in >> c;
                while (!isSeparator(c) && !isOperator(c)) {
                    result.append(1, c);
                    in >> c;
                }
                resultArray[index++] = "(Error, \"" + result + "\")";
                result = "";
                continue;
            }
        }

        //number
        else if (isdigit(c)) {
            result.append(1, c);
            in >> c;
            //a number
            while (isdigit(c)) {
                result.append(1, c);
                in >> c;
            }

            //separator or operator
            if (isSeparator(c) || isOperator(c)) {
                resultArray[index++] = "(3, \"" + result + "\")";
                result = "";
                continue;
            }

            //others
            else {
                result.append(1, c);
                in >> c;
                while (!isSeparator(c) && !isOperator(c)) {
                    result.append(1, c);
                    in >> c;
                }
                resultArray[index++] = "(Error, \"" + result + "\")";
                result = "";
                continue;
            }
        }

        //operator
        else if (isOperator(c)) {
            result.append(1, c);
            in >> c;
            //if <=, >=, !=, ==
            if (result == "<" || result == ">" || result == "!" || result == "=") {
                if (c == '=') {
                    result.append(1, c);
                    in >> c;
                }
            }
            resultArray[index++] = "(4, \"" + result + "\")";
            result = "";
            continue;
        }

        //separator
        else if (isSeparator(c)) {
            result.append(1, c);
            if (result == " ") {
                //assign c to the first word after blank
                result = "";
                in >> c;
                continue;
            }
            resultArray[index++] = "(5, \"" + result + "\")";
            result = "";
            in >> c;
        }

        //undefined
        else {
            result.append(1, c);
            resultArray[index++] = "(Error, \"" + result + "\")";
            result = "";
            in >> c;
        }
    }
    in.close();

    //output as (class, value)
    for (int i = 0; i < index; i++) {
        if ((i + 1) % 5 == 0)
            cout << resultArray[i] << endl;
        else cout << left << setw(15) << resultArray[i];
    }

    return 0;
}

七、实验小结

遇到的问题：在处理自定义的程序时，当碰到一些关键字时发现会处理成两个字符，一个是关键字本身，另一个是一个空字符，以标识符的形式输出。说明有错误。
解决方法：通过debug发现是由于没有对空格进行处理，而是在每次循环输入一个字符时判断是否是关键字，如果是就直接跳出循环进行下一次。这样就会导致在将result设为空值之后直接进行判断，输出标识符形式的空字符。于是进行改正，在分隔符判断函数中加入一个” “，同时把输入流从in>>c改成in.get©，这样就能读取到空格字符并进行分割。同时，为了不在输出分隔符时输出空格，在判断分隔符输出时加入一个判断，如果是空字符就将c的值设为读入的下一个非空字符并进行下一轮循环。
但是这样导致了新的问题：输出的每个运算符前面都会有一个空格。发现是由于判断result为空格时，未还原成空字符串。于是加了置为空字符串的语句，就解决了。
总结：通过实验，编写了能够对一个普通的c++/java语言行编译的程序（当然还很多关键字没有包含），对所学的词法分析器进行了实际操作，获得了有效的操作经验。