编译原理的小组作业捏,随手分享一下,防止以后自己再看代码和地铁老人一样(doge)
tips:空格是不影响词法分析判断捏,部分通过运算符分隔词
判断常量的DFA图贴在这里了:(判断标识符的DFA就不贴了(比较简单捏))
结果图:
#include<iostream>
#include<fstream>
using namespace std;
#define MOD 9999971
//储存关键词的哈希值
long long keyword[] = { 17235,349057, 4980080,4621252,138277,183,1110314,4183007,417166,105582,3836897,335129,6707534,137956,15579,3820577,8388370,4471584,4597517,7809209,439,2873918,5928846,6174355,7517928,440078};
int last_state;//用来记录上次分析的词后面可不可以跟一个有符号数(0表示可以跟,1表示不可以跟)//懒得改了(doge)
//用于识别标识符,关键词已在里面通过哈希判断
bool ckIdentifier(string s) {
int state = 1; int index = 0; int len = s.length(); long long tmp = 0; long long xs = 1;
while (index < len && (state == 1 || state == 2 || state == 3)) {
if (state == 1) {
if (isalpha(s[index]) || s[index] == '_') {
tmp += (s[index] - 'a' + 1) * xs; xs *= 29; tmp %= MOD;
state = 2;
index++;
}
else {
return false;
}
}
else if (state == 2) {
if (isalpha(s[index]) || isdigit(s[index]) || s[index] == '_') {
tmp += (s[index] - 'a' + 1) * xs; xs *= 29; tmp %= MOD;
index++;
}
else {
return false;
}
}
}
for (int i = 0; i < 25; i++) {
if (tmp == keyword[i]) {
cout << s << " " << "keyword" << endl;
last_state = 1;
return true;
}
}
if (state == 2) {
cout << s << " " << "identifier" << endl;
last_state = 1;
return true;
}
}
//用于识别运算符,直接判就好啦(doge)
bool ckoperator(string s) {
if (s == "+" || s == "-" || s == "*" || s == "/" || s == "=" || s == "%" || s == "==" || s == "!=" || s == "<" || s == ">" || s == "<=" || s == ">=" || s == "!" || s == "&&" || s == "||") {
cout << s << " " << "operator" << endl;
return true;
}
return false;
}
//用于识别边界符,直接判
bool ckborder(string s) {
if (s == "(" || s == ")" || s == "{" || s == "}" || s == ";" || s == ",") {
cout << s << " " << "border" << endl;
last_state = 1;
return true;
}
return false;
}
//用于识别常量值,(包含了有符号整数,无符号整数,有符号浮点数,无符号浮点数,无符号八进制数,无符号十六进制数)
bool ckconst(string s) {
int state = 0; int index = 0; int len = s.length();
while (index < len && (state == 0 || state == 1 || state == 2 || state == 3 || state == 4 || state == 5 || state == 6 || state == 7 || state == 8 || state == 9 || state == 10 || state == 11)) {
if (state == 0) {
if (s[index] == '+' || s[index] == '-') {
state = 1;
index++;
}
else if (s[index] != '0' && isdigit(s[index])) {
state = 3;
index++;
}
else if (s[index] == '0') {
state = 8;
index++;
}
else {
return false;
}
}
else if (state == 1) {
if (isdigit(s[index])) {
state = 2;
index++;
}
else {
return false;
}
}
else if (state == 2) {
if (isdigit(s[index])) {
index++;
}
else if (s[index] == '.') {
state = 4;
index++;
}
}
else if (state == 3) {
if (isdigit(s[index])) {
index++;
}
else if (s[index] == '.') {
state = 5;
index++;
}
else {
return false;
}
}
else if (state == 4) {
if (isdigit(s[index])) {
state = 7;
index++;
}
else {
return false;
}
}
else if (state == 5) {
if (isdigit(s[index])) {
state = 6;
index++;
}
else {
return false;
}
}
else if (state == 6) {
if (isdigit(s[index])) {
index++;
}
else {
return false;
}
}
else if (state == 7) {
if (isdigit(s[index])) {
index++;
}
else {
return false;
}
}
else if (state == 8) {
if (s[index] == '.') {
state = 5;
index++;
}
else if (isdigit(s[index])) {
state = 9;
index++;
}
else if (s[index] == 'x' || s[index] == 'X') {
state = 10;
index++;
}
else {
return false;
}
}
else if (state == 9) {
if (isdigit(s[index])) {
index++;
}
else {
return false;
}
}
else if (state == 10) {
if (isdigit(s[index]) || s[index] >= 'a' && s[index] <= 'f' || s[index] >= 'A' && s[index] <= 'F') {
state = 11;
index++;
}
else {
return false;
}
}
else if (state == 11) {
if (isdigit(s[index]) || s[index] >= 'a' && s[index] <= 'f' || s[index] >= 'A' && s[index] <= 'F') {
index++;
}
else {
return false;
}
}
}
if (state == 2) {
last_state = 1;
cout << s << " signedinteger" << endl;
return true;
}
else if (state == 3 || state == 8) {
last_state = 1;
cout << s << " unsignedinteger" << endl;
return true;
}
else if (state == 7) {
last_state = 1;
cout << s << " signedfloat" << endl;
return true;
}
else if (state == 6) {
last_state = 1;
cout << s << " unsignedfloat" << endl;
return true;
}
else if (state == 9) {
last_state = 1;
cout << s << " octalinteger" << endl;
}
else if (state == 11) {
last_state = 1;
cout << s << " hexinteger" << endl;
}
else {
return false;
}
}
int main()
{
string s;
char c[40];
ifstream inFile("in.txt", ios::in | ios::binary); //文件输入
while (inFile.getline(c,40)) {
s = "";
for (int i = 0; i < 40; i++) {
if (c[i] != '\0' && c[i] != '\r') {
s += c[i];
}
else {
break;
}
} //把字符数组转成我熟悉的字符串
string tmp = ""; int len = s.length();
for (int i = 0; i < len; i++) {
if (s[i] == ' ') { //遇到一个空格就要判断一下之前所存的tmp是否符号词法规定捏
if (ckIdentifier(tmp) || ckoperator(tmp) || ckborder(tmp) || ckconst(tmp)) {
}
else {
cout << "wrong" << endl;
}
tmp = "";
continue;
}
else if (s[i] == '_' || isalpha(s[i]) || isdigit(s[i]) || s[i] == '.') //这种特殊符号就直接加上好了
tmp += s[i];
else if (s[i] == ',' || s[i] == ';' || s[i] == '{' || s[i] == '}' || s[i] == '(' || s[i] == ')' || s[i] == '+' || s[i] == '-' || s[i] == '*' || s[i] == '/' || s[i] == '%' || s[i] == '=' || s[i] == '!' || s[i] == '<' || s[i] == '>' || s[i] == '&' || s[i] == '|') {
last_state = 0;
if (ckIdentifier(tmp) || ckoperator(tmp) || ckborder(tmp) || ckconst(tmp)) {
} //碰到以上符号也要判断一下tmp是否符合词法规定捏
else {
cout << "wrong" << " " << endl;
}
tmp = "";
tmp += s[i];
if (i < len && ((s[i] == '=' && s[i + 1] == '=') || (s[i] == '!' && s[i + 1] == '=') || (s[i] == '<' && s[i + 1] == '=') || (s[i] == '>' && s[i + 1] == '=') || (s[i] == '&' && s[i + 1] == '&') || (s[i] == '|' && s[i + 1] == '|'))) {
tmp += s[i + 1]; i++;
} //出现那种长度为2的运算符
else if (last_state == 0 && (i == 0 || s[i - 1] != ')') && (i < len && ((s[i] == '+' && isdigit(s[i + 1])) || (s[i] == '-' && isdigit(s[i + 1]))))) {
tmp += s[i + 1]; i++;
continue;
} //这个是用来加上有符号捏,防止出现a+b却把+b识别为正数的情况
if (ckIdentifier(tmp) || ckoperator(tmp) || ckborder(tmp) || ckconst(tmp)) {
}
else {
cout << "wrong" << " " << endl;
}
tmp = "";
}
else {
tmp += s[i];
} //出现一些乱七八糟的字母也得加入tmp,eg:@ab需要被识别为错误
}
if (ckIdentifier(tmp) || ckoperator(tmp) || ckborder(tmp) || ckconst(tmp)) {
} //循环结束,判断最后一下tmp
else {
cout << "wrong" << " " << endl;
}
}
return 0;
}