【C++】编译原理+自动机理论实现词法分析器
- 实验目的:
- 熟悉词法分析阶段的要求
- 掌握利用自动机理论实现词法分析器的方法。
- 实验设备:
- 硬件:PC 机一台;
- 软件:Windows系统;高级语言集成开发环境。
- 实验内容:
- 根据词法要求采用自动机理论实现词法分析器。
- 实现语言不可用脚本类语言,推荐使用C语言;
- 不允许使用任何语言的正规式控件实现实验要求。
- 实验要求及步骤
-
理解以下词法表述和状态图表示.
-
将以上状态图转换为DFA的状态转换矩阵,写出该矩阵。
状态矩阵 字母 数字 = + * , ( ) 其他 空 0 1 3 5 6 7 1 11 12 13 1 1 1 *2 *2 *2 *2 *2 *2 *2 3 *4 3 *4 *4 *4 *4 *4 *4 *4 7 *8 *8 *8 *8 9 *8 *8 *8 *8 2 0 4 0 5 0 6 0 8 0 9 0 10 0 11 0 12 0 13 0 -
根据DFA的状态转换矩阵实现词法分析器。
exp2.h
#include<bits/stdc++.h>
using namespace std;
extern const vector<vector<int>> states;
extern const vector<vector<bool>>backs;
void input();
void solve();
void output();
typedef struct node {
public:
string word;
int num;
string bianma;
string zhi;
node(string a, int b, string c);
void print();
}node;
extern vector<node>v;
extern string input_str;
extern queue<string> output_queue;
const_num.cpp
#include "exp2.h"
extern vector<node>v = {
node("",0,""),
node("DIM",1,"$DIM"),
node("IF",2,"$IF"),
node("DO",3,"$DO"),
node("STOP",4,"$STOP"),
node("END",5,"$END"),
node("标识符",6,"$ID"),
node("常数",7,"$INT"),
node("=",8,"$ASSIGN"),
node("+",9,"$PLUS"),
node("*",10,"$STAR"),
node("**",11,"$POWER"),
node(",",12,"$COMMA"),
node("(",13,"$LPAR"),
node(")",14,"$RPAR"),
node("其他",15,"$ERR")
};
extern const vector<vector<bool>>backs = {
{false,false,false,false,false,false,false,false,false},
{false,false,true,true,true,true,true,true,true,},
{true,false,true,true,true,true,true,true,true},
{true,true,true,true,true,true,true,true,true},
};
extern const vector<vector<int>> states = {
{1, 3, 5, 6, 7, 10, 11, 12, 13},
{1, 1, 2, 2, 2, 2, 2, 2, 2},
{4, 3, 4, 4, 4, 4, 4, 4, 4},
{8, 8, 8, 8, 9, 8, 8, 8, 8},
};
main.cpp
#include "exp2.h"
int main() {
input();
solve();
output();
}
node.cpp
#include "exp2.h"
node::node(string a, int b, string c)
{
word = a;
num = b;
bianma = c;
zhi = "";
}
void node::print()
{
cout << bianma << "\t\t" << zhi << endl;
}
state.cpp
#include "exp2.h"
int get_index(char input_char)
{
if (isalpha(input_char))
return 0;
if (isdigit(input_char))
return 1;
if (input_char == '=')
return 2;
if (input_char == '+')
return 3;
if (input_char == '*')
return 4;
if (input_char == ',')
return 5;
if (input_char == '(')
return 6;
return 7;
}
pair<int, bool> state_transition(int current_state, char input_char) {
int next_state;
bool back = false;
if (input_char == NULL)
{
switch (current_state)
{
case 2:
case 4:
case 8:
back = true;
case 5:
case 6:
case 9:
case 10:
case 11:
case 12:
case 13:
next_state = 0;
break;
default:
next_state = current_state;
break;
}
}
else
{
if (current_state == 0 || current_state == 1 || current_state == 3 || current_state == 7)
{
int index = get_index(input_char);
if (current_state == 3)
current_state = 2;
if (current_state == 7)
current_state = 3;
next_state = states[current_state][index];
back = backs[current_state][index];
}
}
return { next_state, back };
}
void solve() {
int current_state = 0;
string str = "";
for (int i = 0; i < input_str.size(); i++)
{
char input_char = input_str[i];
pair<int, bool> result = state_transition(current_state, input_char);
if (result.second) {
i--;
}
else
{
str += input_char;
}
current_state = result.first;
result = state_transition(current_state, NULL);
current_state = result.first;
if (!current_state)
{
output_queue.push(str);
str = "";
}
}
if(str.size())
output_queue.push(str);
}
utils.cpp
#include "exp2.h"
string input_str;
queue<string> output_queue;
void input()
{
cout << "请输入一个表达式:";
getline(cin, input_str);
}
int get_type_by_word(const string& word) {
if (word == "DIM")
return 1;
if (word == "IF")
return 2;
if (word == "DO")
return 3;
if (word == "STOP")
return 4;
if (word == "END")
return 5;
if (isalpha(word[0]))
return 6;
if (isdigit(word[0]))
return 7;
if (word == "=")
return 8;
if (word == "+")
return 9;
if (word == "*")
return 10;
if (word == "**")
return 11;
if (word == ",")
return 12;
if (word == "(")
return 13;
if (word == ")")
return 14;
return 15;
}
void output()
{
for (int i = 0; i < 30; i++)
cout << "=";
cout << endl;
cout << "编码" << "\t\t" << "值" << endl;
while (output_queue.size())
{
int index = get_type_by_word(output_queue.front());
node a = v[index];
if (index == 6 || index == 7)
a.zhi = output_queue.front();
a.print();
output_queue.pop();
}
}
-
自设10对如下所示输入输出对,展示词法分析器运行结果。
输入语句任意自设,可包含错误单词($ERR类);若含错误单词,则词法分析器运行需报告错误,错误信息含第一个错误单词位置.序号 输入 输出 1 IF(X)=5 2 IF(55X)=X55 3 DO(I=I+1) 4 5+2=7 5 X**2=10 6 (EE+END)**2 = 8XX 7 A!=1 8 #INCLUDE<BITS/STDC++.H> 9 IF(A=1)B=2 10 666
-
从代码量、时间复杂度、空间复杂度三方面,分析对比实验一与实验二两种词法分析器。
实验1 实验2 代码量 150行,包含1个文件:main.cpp(实验一没分文件) 223行包含6个文件:exp2.h, const_num.cpp, main.cpp, node.cpp, state.cpp, utils.cpp 时间复杂度 时间复杂度取决于输入的字符串的长度n以及正则表达式的复杂度。假设正则表达式的复杂度为m,则词法分析器的时间复杂度可以表示为O(nm)。 状态转换图的大小已经确定,时间复杂度是O(n) 空间复杂度 空间复杂度取决于状态转换图的大小,也就是NFA的大小。假设状态转换图的大小为p,则词法分析器的空间复杂度可以表示为O(nm)。 状态转换图的大小已经确定,空间复杂度是O(n)