编译原理 | 课程设计 — PL/0编译程序词法分析

1、任务描述

使用C/C++语言编写PL/0编译程序的词法分析程序。 需要注意的点:

(1)识别非法字符:如 @ 、 & 和 ! 等;

(2)识别非法单词:数字开头的数字字母组合;

(3)标识符和无符号整数的长度不超过8位;

(4)能自动识别并忽略/* */及//格式的注释信息;

(5)词法分析过程中遇到错误后能继续往下识别,并输出错误信息。

2、编程要求

完成上述编程任务,将C/C++语言源程序复制粘贴到右侧代码编辑器,点击“评测”按钮,运行程序,系统会自动进行结果对比。

3、测试说明

平台会对你编写的代码进行测试:

测试输入:

const a = 10;
var   b, c;
procedure fun1;
    if a <= 10 then
        begin
            c := b + a;
        end;
begin
    read(b);
    while b # 0 do
        begin
            call fun1;
            write(2 * c);
            read(b);
        end
end.

预期输出:

(保留字,const)
(标识符,a)
(运算符,=)
(无符号整数,10)
(界符,;)
(保留字,var)
(标识符,b)
(界符,,)
(标识符,c)
(界符,;)
(保留字,procedure)
(标识符,fun1)
(界符,;)
(保留字,if)
(标识符,a)
(运算符,<=)
(无符号整数,10)
(保留字,then)
(保留字,begin)
(标识符,c)
(运算符,:=)
(标识符,b)
(运算符,+)
(标识符,a)
(界符,;)
(保留字,end)
(界符,;)
(保留字,begin)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,while)
(标识符,b)
(运算符,#)
(无符号整数,0)
(保留字,do)
(保留字,begin)
(保留字,call)
(标识符,fun1)
(界符,;)
(保留字,write)
(界符,()
(无符号整数,2)
(运算符,*)
(标识符,c)
(界符,))
(界符,;)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,end)
(保留字,end)
(界符,.)

测试输入:

const 2a = 123456789;
var   b, c;
//单行注释
/*
* 多行注释
*/
procedure function1;
    if 2a <= 10 then
        begin
            c := b + a;
        end;
begin
    read(b);
    while b @ 0 do
        begin
            call function1;
            write(2 * c);
            read(b);
        end
end.

预期输出:

(保留字,const)
(非法字符(串),2a,行号:1)
(运算符,=)
(无符号整数越界,123456789,行号:1)
(界符,;)
(保留字,var)
(标识符,b)
(界符,,)
(标识符,c)
(界符,;)
(保留字,procedure)
(标识符长度超长,function1,行号:10)
(界符,;)
(保留字,if)
(非法字符(串),2a,行号:11)
(运算符,<=)
(无符号整数,10)
(保留字,then)
(保留字,begin)
(标识符,c)
(运算符,:=)
(标识符,b)
(运算符,+)
(标识符,a)
(界符,;)
(保留字,end)
(界符,;)
(保留字,begin)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,while)
(标识符,b)
(非法字符(串),@,行号:17)
(无符号整数,0)
(保留字,do)
(保留字,begin)
(保留字,call)
(标识符长度超长,function1,行号:19)
(界符,;)
(保留字,write)
(界符,()
(无符号整数,2)
(运算符,*)
(标识符,c)
(界符,))
(界符,;)
(保留字,read)
(界符,()
(标识符,b)
(界符,))
(界符,;)
(保留字,end)
(保留字,end)
(界符,.)

4、代码

#include<bits/stdc++.h>
#include <string>
#include <vector>
#include <fstream>
using namespace std;

vector<string> wordTable;	// 单词表
map<string, int> B;         // 保留字
map<string, int> Y;         // 运算符
map<char, int> J;           // 界符
map<char, int> y;
string s1, s2, s3;
int F; // 标记'/*''*/'注释

void Init() {
    B["const"] = B["var"] = B["procedure"] = B["begin"] = B["end"] = B["odd"] = B["if"] = B["then"] = B["call"] = B["while"] = B["do"] = B["read"] = B["write"] = 1; //保留字,13个 
    Y["+"] = Y["-"] = Y["*"] = Y["/"] = Y["<"] = Y["<="] = Y[">"] = Y[">="] = Y["#"] = Y["="] = Y[":="] = 1; //运算符,11个 
    y['+'] = y['-'] = y['*'] = y['/'] = y['<'] = y['>'] = y['='] = y['#'] = y[':'] = 1;
    J['('] = J[')'] = J[','] = J[';'] = J['.'] = 1; //界符,5个
    s1 = "//", s2 = "/*", s3 = "*/";
}

void LexicalAnalysis() {
    string input, word;
    Init();
    for (int line = 1; getline(cin, input); line++) {
        int f = 0; // 标记'//'注释
        for (int i = 0; input[i] != '\0'; i++) {
            // 去掉空格与'\t'
            while (input[i] != '\0' && (input[i] == ' ' || input[i] == '\t'))
                i++;

            int numFlag = 0, letterFlag = 0, operatorFlag = 0, boundaryFlag = 0, illegalFlag = 0;
            int j1 = i, j2 = i; //j1(保留字、标识符、无符号整数)、j2(运算符)

            // 标记非法字符
            if (!((input[i] >= '0' && input[i] <= '9') || (input[i] >= 'a' && input[i] <= 'z') || (input[i] >= 'A' && input[i] <= 'Z') || y[input[i]] == 1 || J[input[i]] == 1))
                illegalFlag = 1;

            // 识别数字与字母串
            while (input[j1] != '\0' && ((input[j1] >= '0' && input[j1] <= '9') || (input[j1] >= 'a' && input[j1] <= 'z') || (input[j1] >= 'A' && input[j1] <= 'Z'))) {
                if (input[j1] >= '0' && input[j1] <= '9') numFlag = 1; // 标记数字 
                else letterFlag = 1; // 标记字母 
                j1++;
            }

            // 识别运算符,并标记运算符 
            while (input[j2] != '\0' && y[input[j2]] == 1) {
                operatorFlag = 1;
                j2++;
            }

            // 识别界符,并标记界符
            if (J[input[i]] == 1)
                boundaryFlag = 1;

            //提取字符串
            if (numFlag || letterFlag)
                word = input.substr(i, j1 - i);


            //无符号整数
            if (numFlag == 1 && letterFlag == 0 && !f && !F) {
                if (word.size() <= 8) cout << "(无符号整数," << word << ")" << endl;
                else cout << "(无符号整数越界," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //标识符
            else if (letterFlag == 1 && B[word] != 1 && !f && !F) {
                if (input[i] >= '0' && input[i] <= '9') cout << "(非法字符(串)," << word << ",行号:" << line << ")" << endl; //识别非法字符串
                else if (word.size() <= 8) cout << "(标识符," << word << ")" << endl;
                else cout << "(标识符长度超长," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //保留字
            else if (letterFlag == 1 && B[word] == 1 && !f && !F) {
                cout << "(保留字," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //运算符及注释
            else if (operatorFlag == 1) {
                word = input.substr(i, j2 - i); // 提取字符串
                if (word == s1) f = 1; // '//'注释,标记'//' 
                else if (word == s2) F = 1; // '/*'注释,标记'/*'
                else if (word == s3) F = 0; // '*/'注释,解除'/*/'标记
                else if (!f && !F) {
                    cout << "(运算符," << word << ")" << endl;
                    wordTable.push_back(word);
                }
            }
            //界符 
            else if (boundaryFlag == 1 && !f && !F) {
                word = input[i];
                cout << "(界符," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //识别非法字符
            else if (illegalFlag == 1 && !f && !F) {
                cout << "(非法字符(串)," << input[i] << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }

            if (j1 != i)
                i = j1 - 1;
            else if (j2 != i)
                i = j2 - 1;
        }
    }
}


int main() {
    LexicalAnalysis();
    return 0;
}

5、扩展

        添加文件输入的功能,程序能够读取指定目录下的源程序字符串txt文件,进行语法分析并将结果输出到指定的文件下。代码如下:

#include<bits/stdc++.h>
#include <string>
#include <vector>
#include <fstream>
using namespace std;

ifstream infile;    // 文件输入流
ofstream outfile;   // 文件输出流

vector<string> wordTable;	// 单词表
map<string, int> B;         // 保留字
map<string, int> Y;         // 运算符
map<char, int> J;           // 界符
map<char, int> y;           // 
string s1 , s2, s3 ;
int F; // 标记'/*''*/'注释

void Init() {
    B["const"] = B["var"] = B["procedure"] = B["begin"] = B["end"] = B["odd"] = B["if"] = B["then"] = B["call"] = B["while"] = B["do"] = B["read"] = B["write"] = 1; //保留字,13个 
    Y["+"] = Y["-"] = Y["*"] = Y["/"] = Y["<"] = Y["<="] = Y[">"] = Y[">="] = Y["#"] = Y["="] = Y[":="] = 1; //运算符,11个 
    y['+'] = y['-'] = y['*'] = y['/'] = y['<'] = y['>'] = y['='] = y['#'] = y[':'] = 1;
    J['('] = J[')'] = J[','] = J[';'] = J['.'] = 1; //界符,5个
    s1 = "//", s2 = "/*", s3 = "*/";
}

void LexicalAnalysis() {
    string input, word;
    Init();
    for (int line = 1; getline(infile, input); line++) {
        int f = 0; // 标记'//'注释
        for (int i = 0; input[i] != '\0'; i++) {
            // 去掉空格与'\t'
            while (input[i] != '\0' && (input[i] == ' ' || input[i] == '\t'))
                i++;

            int numFlag = 0, letterFlag = 0, operatorFlag = 0, boundaryFlag = 0, illegalFlag = 0;
            int j1 = i, j2 = i; //j1(保留字、标识符、无符号整数)、j2(运算符)

            // 标记非法字符
            if (!((input[i] >= '0' && input[i] <= '9') || (input[i] >= 'a' && input[i] <= 'z') || (input[i] >= 'A' && input[i] <= 'Z') || y[input[i]] == 1 || J[input[i]] == 1))
                illegalFlag = 1;

            // 识别数字与字母串
            while (input[j1] != '\0' && ((input[j1] >= '0' && input[j1] <= '9') || (input[j1] >= 'a' && input[j1] <= 'z') || (input[j1] >= 'A' && input[j1] <= 'Z'))) {
                if (input[j1] >= '0' && input[j1] <= '9') numFlag = 1; // 标记数字 
                else letterFlag = 1; // 标记字母 
                j1++;
            }

            // 识别运算符,并标记运算符 
            while (input[j2] != '\0' && y[input[j2]] == 1) {
                operatorFlag = 1;
                j2++;
            }

            // 识别界符,并标记界符
            if (J[input[i]] == 1)
                boundaryFlag = 1;

            //提取字符串
            if (numFlag || letterFlag)
                word = input.substr(i, j1 - i);


            //无符号整数
            if (numFlag == 1 && letterFlag == 0 && !f && !F) {
                if (word.size() <= 8) cout << "(无符号整数," << word << ")" << endl;
                else cout << "(无符号整数越界," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //标识符
            else if (letterFlag == 1 && B[word] != 1 && !f && !F) {
                if (input[i] >= '0' && input[i] <= '9') cout << "(非法字符(串)," << word << ",行号:" << line << ")" << endl; //识别非法字符串
                else if (word.size() <= 8) cout << "(标识符," << word << ")" << endl;
                else cout << "(标识符长度超长," << word << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }
            //保留字
            else if (letterFlag == 1 && B[word] == 1 && !f && !F) {
                cout << "(保留字," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //运算符及注释
            else if (operatorFlag == 1) {
                word = input.substr(i, j2 - i); // 提取字符串
                if (word == s1) f = 1; // '//'注释,标记'//' 
                else if (word == s2) F = 1; // '/*'注释,标记'/*'
                else if (word == s3) F = 0; // '*/'注释,解除'/*/'标记
                else if (!f && !F) {
                    cout << "(运算符," << word << ")" << endl;
                    wordTable.push_back(word);
                }
            }
            //界符 
            else if (boundaryFlag == 1 && !f && !F) {
                word = input[i];
                cout << "(界符," << word << ")" << endl;
                wordTable.push_back(word);
            }
            //识别非法字符
            else if (illegalFlag == 1 && !f && !F) {
                cout << "(非法字符(串)," << input[i] << ",行号:" << line << ")" << endl;
                wordTable.push_back(word);
            }

            if (j1 != i)
                i = j1 - 1;
            else if (j2 != i)
                i = j2 - 1;
        }
    }
}

int ReadFile(string path) {
    infile.open(path.c_str());
    if (!infile.is_open()) {
        cout << "输入文件打开失败!" << endl;
        return 0;
    }
    return 1;
}

int WriteFile(string path) {
    outfile.open(path.c_str());
    if (!outfile.is_open()) {
        cout << "输出文件打开失败!" << endl;
        return 0;
    }
    else {
        for (int i = 0; i < wordTable.size(); i++) {
            outfile << wordTable[i] << endl;
        }
        return 1;
    }
}

int main() {
    // 要分析的 源程序串 的地址
    string source_path = "E:\\...\\source.txt";
    // 要写入结果的 文件 的地址
    string result_path = "E:\\...\\result.txt";
    if (ReadFile(source_path)) {
        LexicalAnalysis();
        if (WriteFile(result_path)) {
            cout << "\n单词表已保存到文件中" << endl;
        }
    }
    return 0;
}

 指定的文件如下:

  运行结果如下:

 读取源程序文件:

 写入结果文件:

 

  • 2
    点赞
  • 42
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
下面是一个使用 Java 语言编写PL/0 编译程序词法分析程序: ```java import java.io.*; public class Lexer { private BufferedReader reader; private char currentChar; private int currentPos; private Token currentToken; public Lexer(String fileName) throws IOException { reader = new BufferedReader(new FileReader(fileName)); currentPos = 0; nextChar(); } public Token getNextToken() throws IOException { while (currentChar != 0) { if (Character.isWhitespace(currentChar)) { skipWhitespace(); continue; } if (Character.isDigit(currentChar)) { return getNumber(); } switch (currentChar) { case '+': nextChar(); return new Token(TokenType.PLUS, "+"); case '-': nextChar(); return new Token(TokenType.MINUS, "-"); case '*': nextChar(); return new Token(TokenType.TIMES, "*"); case '/': nextChar(); return new Token(TokenType.SLASH, "/"); case '(': nextChar(); return new Token(TokenType.LPAREN, "("); case ')': nextChar(); return new Token(TokenType.RPAREN, ")"); case ';': nextChar(); return new Token(TokenType.SEMI, ";"); case '.': nextChar(); return new Token(TokenType.PERIOD, "."); case ':': nextChar(); if (currentChar == '=') { nextChar(); return new Token(TokenType.ASSIGN, ":="); } else { return new Token(TokenType.COLON, ":"); } case ',': nextChar(); return new Token(TokenType.COMMA, ","); default: throw new IOException("Invalid character: " + currentChar); } } return new Token(TokenType.EOF, ""); } private void nextChar() throws IOException { currentPos++; int next = reader.read(); currentChar = (char) next; } private void skipWhitespace() throws IOException { while (Character.isWhitespace(currentChar)) { nextChar(); } } private Token getNumber() throws IOException { StringBuilder sb = new StringBuilder(); do { sb.append(currentChar); nextChar(); } while (Character.isDigit(currentChar)); return new Token(TokenType.NUMBER, sb.toString()); } } ``` 在这个词法分析程序中,我们通过 `BufferedReader` 对象读取输入文件,并通过 `nextChar` 方法逐个读取字符。通过 `getNextToken` 方法,我们不断循环,直到遇到文件结尾或者识别到一个词法单元。在循环中,我们首先判断当前字符是否为空白字符,如果是,就跳过它。接着,我们判断当前字符是否为数字,如果是,就调用 `getNumber` 方法来获取一个数字词法单元。如果当前字符是其他符号,则根据其类型返回相应的词法单元。如果当前字符是一个非法字符,则抛出异常。最后,如果整个文件已经读取完毕,则返回一个 EOF 词法单元。 在 `getNumber` 方法中,我们使用 `StringBuilder` 来动态构建数字字符串,直到读取到一个非数字字符为止。最后,我们返回一个 NUMBER 词法单元,其中保存了数字字符串。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值