刚做完正则匹配的题目,现在顺便也把这道编译原理上实现的词法分析程序分享一下吧,虽然挺简单的。
题意:如上图,说的很清楚,就是分析出输入的字符串的每个词属于哪种类型中的哪一项,标识符和整数是出现过的添加进去。
分析与思路:首先,单个字符的元素是比较容易判断的,然而,对于关键字,标识符,数字这种就有点麻烦,包含多个字符的。我的思路是,因为多个字符的三种情况的一个元素里面是不可能含有分界符运算符的,所以我把分界符运算符和'#'统称为分隔符。我把遇到分隔符前的字符都收集起来组成字符串,再对其判断是否为数字(首字符为数字),再判断是否关键字,最后就是标识符了。
代码:
#include<iostream>
#include<vector>
#include <string>
using namespace std;
struct { int number; string str[10]; } keywords = { 3,"int","main","return" }; //关键词
struct { int number; string str[10]; } operators = { 5,"+","*","=","+=","*=" }; //运算符
struct { int number; string str[10]; } boundaries = { 6,"(",")","{","}",",",";" }; //分界符
struct { int number; string str[100]; } identifieres = { 0 }; //标识符
struct { int number; string str[100]; } Unsigned_integer = { 0 }; //无符号整数
string changinStr(vector<char> a) {
string temp(a.size(), ' ');
for (int i = 0; i < a.size(); i++) {
temp[i] = a[i];
}
return temp;
}
//思路:观察哪些是分隔符,不是分隔符则积累
int main() {
string strin;
bool isstart = 1;
while (isstart) {
getline(cin, strin);
int i = 0;
while (i < strin.length()) {
vector<char> oneStr;
while (strin[i] != ' '&& strin[i] != '(' && strin[i] != ')' && strin[i] != '{' && strin[i] != '}' && strin[i] != ',' && strin[i] != ';'&& strin[i] != '+'&& strin[i] != '*'&&strin[i] != '='&&strin[i] != '#') {
oneStr.push_back(strin[i]);
i++;
}
if (oneStr.size() > 0) {
string temp = changinStr(oneStr);
int k = 0;
if (temp[0] >= '0'&&temp[0] <= '9') {//数字
for (k = 0; k < Unsigned_integer.number; k++) {
if (Unsigned_integer.str[k] == temp) break;
}
if (k == Unsigned_integer.number) {
Unsigned_integer.str[Unsigned_integer.number++] = temp;
cout << "<5,"<< Unsigned_integer.number-1<<">";
}
else cout << "<5," << k << '>';
}
else {
for (k = 0; k < keywords.number; k++) {//关键字
if (keywords.str[k] == temp) {
cout << "<1," << k << '>';
break;
}
}
if (k == keywords.number) {
for (k = 0; k < identifieres.number; k++) {//标识符
if (identifieres.str[k] == temp) break;
}
if (k == identifieres.number) {
identifieres.str[identifieres.number++] = temp;
cout << "<4,"<< identifieres.number-1<<">";
}
else cout << "<4," << k << ">";
}
}
oneStr.clear();
}
if (strin[i] == '(' || strin[i] == ')' || strin[i] == '{' || strin[i] == '}' || strin[i] == ',' || strin[i] == ';') {
string bstr(1, strin[i]);
for (int t = 0; t < boundaries.number; t++) {
if (boundaries.str[t] == bstr) {
cout << "<3," << t << '>';
break;
}
}
}
else if (strin[i] == '+' || strin[i] == '=' || strin[i] == '*') {
if (strin[i] == '=') cout << "<2,2>";
else {
if (strin[i + 1] == '=') {
if (strin[i] == '+') cout << "<2,3>";
else cout << "<2,4>";
i++;
}
else {
if (strin[i] == '+') cout << "<2,0>";
else cout << "<2,1>";
}
}
}
else if (strin[i] == '#') {
isstart = 0;
break;
}
i++;
}
}
cout << endl;//输出
cout << "identifieres:";
for (int i = 0; i < identifieres.number; i++) {
cout << identifieres.str[i];
if (i != identifieres.number - 1) cout << " ";
else cout << endl;
}
cout << "Unsigned_integer:";
for (int i = 0; i < Unsigned_integer.number; i++) {
cout << Unsigned_integer.str[i];
if (i != Unsigned_integer.number - 1) cout << " ";
else cout << endl;
}
system("pause");
}