1、任务描述
使用C/C++语言编写PL/0编译程序的词法分析程序。 需要注意的点:
(1)识别非法字符:如 @ 、 & 和 ! 等;
(2)识别非法单词:数字开头的数字字母组合;
(3)标识符和无符号整数的长度不超过8位;
(4)能自动识别并忽略/* */及//格式的注释信息;
(5)词法分析过程中遇到错误后能继续往下识别,并输出错误信息。
2、编程要求
完成上述编程任务,将C/C++语言源程序复制粘贴到右侧代码编辑器,点击“评测”按钮,运行程序,系统会自动进行结果对比。
3、测试说明
平台会对你编写的代码进行测试:
测试输入:
const a = 10; var b, c; procedure fun1; if a <= 10 then begin c := b + a; end; begin read(b); while b # 0 do begin call fun1; write(2 * c); read(b); end end.
预期输出:
(保留字,const) (标识符,a) (运算符,=) (无符号整数,10) (界符,;) (保留字,var) (标识符,b) (界符,,) (标识符,c) (界符,;) (保留字,procedure) (标识符,fun1) (界符,;) (保留字,if) (标识符,a) (运算符,<=) (无符号整数,10) (保留字,then) (保留字,begin) (标识符,c) (运算符,:=) (标识符,b) (运算符,+) (标识符,a) (界符,;) (保留字,end) (界符,;) (保留字,begin) (保留字,read) (界符,() (标识符,b) (界符,)) (界符,;) (保留字,while) (标识符,b) (运算符,#) (无符号整数,0) (保留字,do) (保留字,begin) (保留字,call) (标识符,fun1) (界符,;) (保留字,write) (界符,() (无符号整数,2) (运算符,*) (标识符,c) (界符,)) (界符,;) (保留字,read) (界符,() (标识符,b) (界符,)) (界符,;) (保留字,end) (保留字,end) (界符,.)
测试输入:
const 2a = 123456789; var b, c; //单行注释 /* * 多行注释 */ procedure function1; if 2a <= 10 then begin c := b + a; end; begin read(b); while b @ 0 do begin call function1; write(2 * c); read(b); end end.
预期输出:
(保留字,const) (非法字符(串),2a,行号:1) (运算符,=) (无符号整数越界,123456789,行号:1) (界符,;) (保留字,var) (标识符,b) (界符,,) (标识符,c) (界符,;) (保留字,procedure) (标识符长度超长,function1,行号:10) (界符,;) (保留字,if) (非法字符(串),2a,行号:11) (运算符,<=) (无符号整数,10) (保留字,then) (保留字,begin) (标识符,c) (运算符,:=) (标识符,b) (运算符,+) (标识符,a) (界符,;) (保留字,end) (界符,;) (保留字,begin) (保留字,read) (界符,() (标识符,b) (界符,)) (界符,;) (保留字,while) (标识符,b) (非法字符(串),@,行号:17) (无符号整数,0) (保留字,do) (保留字,begin) (保留字,call) (标识符长度超长,function1,行号:19) (界符,;) (保留字,write) (界符,() (无符号整数,2) (运算符,*) (标识符,c) (界符,)) (界符,;) (保留字,read) (界符,() (标识符,b) (界符,)) (界符,;) (保留字,end) (保留字,end) (界符,.)
4、代码
#include<bits/stdc++.h>
#include <string>
#include <vector>
#include <fstream>
using namespace std;
vector<string> wordTable; // 单词表
map<string, int> B; // 保留字
map<string, int> Y; // 运算符
map<char, int> J; // 界符
map<char, int> y;
string s1, s2, s3;
int F; // 标记'/*''*/'注释
void Init() {
B["const"] = B["var"] = B["procedure"] = B["begin"] = B["end"] = B["odd"] = B["if"] = B["then"] = B["call"] = B["while"] = B["do"] = B["read"] = B["write"] = 1; //保留字,13个
Y["+"] = Y["-"] = Y["*"] = Y["/"] = Y["<"] = Y["<="] = Y[">"] = Y[">="] = Y["#"] = Y["="] = Y[":="] = 1; //运算符,11个
y['+'] = y['-'] = y['*'] = y['/'] = y['<'] = y['>'] = y['='] = y['#'] = y[':'] = 1;
J['('] = J[')'] = J[','] = J[';'] = J['.'] = 1; //界符,5个
s1 = "//", s2 = "/*", s3 = "*/";
}
void LexicalAnalysis() {
string input, word;
Init();
for (int line = 1; getline(cin, input); line++) {
int f = 0; // 标记'//'注释
for (int i = 0; input[i] != '\0'; i++) {
// 去掉空格与'\t'
while (input[i] != '\0' && (input[i] == ' ' || input[i] == '\t'))
i++;
int numFlag = 0, letterFlag = 0, operatorFlag = 0, boundaryFlag = 0, illegalFlag = 0;
int j1 = i, j2 = i; //j1(保留字、标识符、无符号整数)、j2(运算符)
// 标记非法字符
if (!((input[i] >= '0' && input[i] <= '9') || (input[i] >= 'a' && input[i] <= 'z') || (input[i] >= 'A' && input[i] <= 'Z') || y[input[i]] == 1 || J[input[i]] == 1))
illegalFlag = 1;
// 识别数字与字母串
while (input[j1] != '\0' && ((input[j1] >= '0' && input[j1] <= '9') || (input[j1] >= 'a' && input[j1] <= 'z') || (input[j1] >= 'A' && input[j1] <= 'Z'))) {
if (input[j1] >= '0' && input[j1] <= '9') numFlag = 1; // 标记数字
else letterFlag = 1; // 标记字母
j1++;
}
// 识别运算符,并标记运算符
while (input[j2] != '\0' && y[input[j2]] == 1) {
operatorFlag = 1;
j2++;
}
// 识别界符,并标记界符
if (J[input[i]] == 1)
boundaryFlag = 1;
//提取字符串
if (numFlag || letterFlag)
word = input.substr(i, j1 - i);
//无符号整数
if (numFlag == 1 && letterFlag == 0 && !f && !F) {
if (word.size() <= 8) cout << "(无符号整数," << word << ")" << endl;
else cout << "(无符号整数越界," << word << ",行号:" << line << ")" << endl;
wordTable.push_back(word);
}
//标识符
else if (letterFlag == 1 && B[word] != 1 && !f && !F) {
if (input[i] >= '0' && input[i] <= '9') cout << "(非法字符(串)," << word << ",行号:" << line << ")" << endl; //识别非法字符串
else if (word.size() <= 8) cout << "(标识符," << word << ")" << endl;
else cout << "(标识符长度超长," << word << ",行号:" << line << ")" << endl;
wordTable.push_back(word);
}
//保留字
else if (letterFlag == 1 && B[word] == 1 && !f && !F) {
cout << "(保留字," << word << ")" << endl;
wordTable.push_back(word);
}
//运算符及注释
else if (operatorFlag == 1) {
word = input.substr(i, j2 - i); // 提取字符串
if (word == s1) f = 1; // '//'注释,标记'//'
else if (word == s2) F = 1; // '/*'注释,标记'/*'
else if (word == s3) F = 0; // '*/'注释,解除'/*/'标记
else if (!f && !F) {
cout << "(运算符," << word << ")" << endl;
wordTable.push_back(word);
}
}
//界符
else if (boundaryFlag == 1 && !f && !F) {
word = input[i];
cout << "(界符," << word << ")" << endl;
wordTable.push_back(word);
}
//识别非法字符
else if (illegalFlag == 1 && !f && !F) {
cout << "(非法字符(串)," << input[i] << ",行号:" << line << ")" << endl;
wordTable.push_back(word);
}
if (j1 != i)
i = j1 - 1;
else if (j2 != i)
i = j2 - 1;
}
}
}
int main() {
LexicalAnalysis();
return 0;
}
5、扩展
添加文件输入的功能,程序能够读取指定目录下的源程序字符串txt文件,进行语法分析并将结果输出到指定的文件下。代码如下:
#include<bits/stdc++.h>
#include <string>
#include <vector>
#include <fstream>
using namespace std;
ifstream infile; // 文件输入流
ofstream outfile; // 文件输出流
vector<string> wordTable; // 单词表
map<string, int> B; // 保留字
map<string, int> Y; // 运算符
map<char, int> J; // 界符
map<char, int> y; //
string s1 , s2, s3 ;
int F; // 标记'/*''*/'注释
void Init() {
B["const"] = B["var"] = B["procedure"] = B["begin"] = B["end"] = B["odd"] = B["if"] = B["then"] = B["call"] = B["while"] = B["do"] = B["read"] = B["write"] = 1; //保留字,13个
Y["+"] = Y["-"] = Y["*"] = Y["/"] = Y["<"] = Y["<="] = Y[">"] = Y[">="] = Y["#"] = Y["="] = Y[":="] = 1; //运算符,11个
y['+'] = y['-'] = y['*'] = y['/'] = y['<'] = y['>'] = y['='] = y['#'] = y[':'] = 1;
J['('] = J[')'] = J[','] = J[';'] = J['.'] = 1; //界符,5个
s1 = "//", s2 = "/*", s3 = "*/";
}
void LexicalAnalysis() {
string input, word;
Init();
for (int line = 1; getline(infile, input); line++) {
int f = 0; // 标记'//'注释
for (int i = 0; input[i] != '\0'; i++) {
// 去掉空格与'\t'
while (input[i] != '\0' && (input[i] == ' ' || input[i] == '\t'))
i++;
int numFlag = 0, letterFlag = 0, operatorFlag = 0, boundaryFlag = 0, illegalFlag = 0;
int j1 = i, j2 = i; //j1(保留字、标识符、无符号整数)、j2(运算符)
// 标记非法字符
if (!((input[i] >= '0' && input[i] <= '9') || (input[i] >= 'a' && input[i] <= 'z') || (input[i] >= 'A' && input[i] <= 'Z') || y[input[i]] == 1 || J[input[i]] == 1))
illegalFlag = 1;
// 识别数字与字母串
while (input[j1] != '\0' && ((input[j1] >= '0' && input[j1] <= '9') || (input[j1] >= 'a' && input[j1] <= 'z') || (input[j1] >= 'A' && input[j1] <= 'Z'))) {
if (input[j1] >= '0' && input[j1] <= '9') numFlag = 1; // 标记数字
else letterFlag = 1; // 标记字母
j1++;
}
// 识别运算符,并标记运算符
while (input[j2] != '\0' && y[input[j2]] == 1) {
operatorFlag = 1;
j2++;
}
// 识别界符,并标记界符
if (J[input[i]] == 1)
boundaryFlag = 1;
//提取字符串
if (numFlag || letterFlag)
word = input.substr(i, j1 - i);
//无符号整数
if (numFlag == 1 && letterFlag == 0 && !f && !F) {
if (word.size() <= 8) cout << "(无符号整数," << word << ")" << endl;
else cout << "(无符号整数越界," << word << ",行号:" << line << ")" << endl;
wordTable.push_back(word);
}
//标识符
else if (letterFlag == 1 && B[word] != 1 && !f && !F) {
if (input[i] >= '0' && input[i] <= '9') cout << "(非法字符(串)," << word << ",行号:" << line << ")" << endl; //识别非法字符串
else if (word.size() <= 8) cout << "(标识符," << word << ")" << endl;
else cout << "(标识符长度超长," << word << ",行号:" << line << ")" << endl;
wordTable.push_back(word);
}
//保留字
else if (letterFlag == 1 && B[word] == 1 && !f && !F) {
cout << "(保留字," << word << ")" << endl;
wordTable.push_back(word);
}
//运算符及注释
else if (operatorFlag == 1) {
word = input.substr(i, j2 - i); // 提取字符串
if (word == s1) f = 1; // '//'注释,标记'//'
else if (word == s2) F = 1; // '/*'注释,标记'/*'
else if (word == s3) F = 0; // '*/'注释,解除'/*/'标记
else if (!f && !F) {
cout << "(运算符," << word << ")" << endl;
wordTable.push_back(word);
}
}
//界符
else if (boundaryFlag == 1 && !f && !F) {
word = input[i];
cout << "(界符," << word << ")" << endl;
wordTable.push_back(word);
}
//识别非法字符
else if (illegalFlag == 1 && !f && !F) {
cout << "(非法字符(串)," << input[i] << ",行号:" << line << ")" << endl;
wordTable.push_back(word);
}
if (j1 != i)
i = j1 - 1;
else if (j2 != i)
i = j2 - 1;
}
}
}
int ReadFile(string path) {
infile.open(path.c_str());
if (!infile.is_open()) {
cout << "输入文件打开失败!" << endl;
return 0;
}
return 1;
}
int WriteFile(string path) {
outfile.open(path.c_str());
if (!outfile.is_open()) {
cout << "输出文件打开失败!" << endl;
return 0;
}
else {
for (int i = 0; i < wordTable.size(); i++) {
outfile << wordTable[i] << endl;
}
return 1;
}
}
int main() {
// 要分析的 源程序串 的地址
string source_path = "E:\\...\\source.txt";
// 要写入结果的 文件 的地址
string result_path = "E:\\...\\result.txt";
if (ReadFile(source_path)) {
LexicalAnalysis();
if (WriteFile(result_path)) {
cout << "\n单词表已保存到文件中" << endl;
}
}
return 0;
}
指定的文件如下:
运行结果如下:
读取源程序文件:
写入结果文件: