【问题描述】通过设计c语言常见单词的正规文法或正规式,而后得到NFA,再确定化得到DFA,根据DFA的转换矩阵或转换图,用c++语言实现词法分析器。
【输入形式】输入一段完整的c语言程序
【输出形式】各类单词的token字
【样例输入】
int main(){
int a = 10;
double b = -20.9;
if(a<=b)
a+=b;
return a;
}
【样例输出】
line1:(type, int)
line1:(keyword, main)
line1:(bracket, ()
line1:(bracket, ))
line1:(bracket, {)
line2:(type, int)
line2:(identify, a)
line2:(OPT, =)
line2:(integer, 10)
line2:(bracket, ;)
line3:(type, double)
line3:(identify, b)
line3:(OPT, =)
line3:(decimal, -20.9)
line3:(bracket, ;)
line4:(keyword, if)
line4:(bracket, ()
line4:(identify, a)
line4:(OPT, <=)
line4:(identify, b)
line4:(bracket, ))
line5:(identify, a)
line5:(OPT, +=)
line5:(identify, b)
line5:(bracket, ;)
line6:(identify, a)
line6:(OPT, =)
line6:(integer, 0)
line6:(bracket, ;)
line7:(keyword, return)
line7:(identify, a)
line7:(bracket, ;)
line8:(bracket, })
【样例说明】需要识别的关键字包括main, return, if, else, do, while, for, scanf, printf, sqrt, abs;type类型包括void, int, double, float, char;运算符(算术、关系、逻辑、位);需要识别的其他单词有标识符, 整数(十进制形式、指数形式),实数(十进制形式、指数形式),字符串(输出类型名为string);过滤注释及空格。
【评分标准】根据设计文档的质量、lex文件的正确性,代码的正确性、代码的时间空间复杂度、识别单词的种类等综合评分
实现代码:
#include <algorithm>
#include <iostream>
#include <vector>
#include <cstring>
using namespace std;
vector<string> keyword = {"scanf", "printf", "if", "else", "for", "while", "return", "do", "main", "abs", "sqrt"};
vector<string> type = {"int", "void", "char", "double", "short", "float"};
vector<char> bracket = {',', '\\', ';', ':', '(', ')', '[', ']', '{', '}', '"', '\''};
struct Node
{
int line = 0;
string type;
string word;
};
vector<Node> stack;
int line = 1;
char text[int(1e5)] = "";
char ch = ' ';
int len = 0;
int i = 0;
string word;
Node temp;
void makeword(string s)
{
temp.line = line;
temp.type = s;
temp.word = word;
stack.push_back(temp);
word.clear();
}
void JudgeE()
{
word += ch;
ch = text[++i];
if (ch == '+' || ch == '-')
{
word += ch;
ch = text[++i];
}
if (ch >= '1' && ch <= '9')
{
word += ch;
while ((ch = text[++i]) && (ch >= '0' && ch <= '9'))
word += ch;
makeword("float");
}
else
{
cout << "Error at Line " << line << ": Illegal floating point number \"" << word << "\".\n";
exit(-1);
}
}
void jump()
{
while (ch == ' ' || ch == '\n')
{
if (ch == '\n')
line++;
ch = text[++i];
}
}
void makenumber()
{
while (ch >= '0' && ch <= '9')
{
word += ch;
ch = text[++i];
}
if (ch == '.')
{
word += ch;
ch = text[++i];
while (ch >= '0' && ch <= '9')
{
word += ch;
ch = text[++i];
}
if (ch == 'e')
JudgeE();
else
makeword("decimal");
}
else if (ch == 'e')
JudgeE();
else
makeword("integer");
}
void fun()
{
ch = text[i];
jump();
while (ch != '\0' && ch != EOF)
{
jump();
if (ch == '+' || ch == '-')
{
word += ch;
ch = text[++i];
if (ch == '=')
{
word += ch;
makeword("OPT");
ch = text[++i];
}
else
makenumber();
}
if (ch >= '0' && ch <= '9')
{
word += ch;
ch = text[++i];
makenumber();
}
if (isalnum(ch) || ch == '_')
{
while (isalnum(ch) || ch == '_')
{
word += ch;
ch = text[++i];
}
if (find(keyword.begin(), keyword.end(), word) != keyword.end())
makeword("keyword");
else if (find(type.begin(), type.end(), word) != type.end())
makeword("type");
else
makeword("identify");
}
if (ch == '/')
{
char temp = text[++i];
if (temp == '/')
do
{
ch = text[++i];
} while (ch != '\n');
else if (temp == '*')
{
do
{
ch = text[++i];
} while (ch != '/');
ch = text[++i];
}
else if (temp == '=')
{
word += "/=";
makeword("OPT");
ch = text[++i];
}
else
{
word += '/';
makeword("OPT");
ch = text[++i];
}
}
if (ch == '0')
{
word += ch;
ch = text[++i];
makeword("integer");
}
if (ch == '%' || ch == '&')
{
if (isalnum(text[i+1]))
{
word += ch;
word += text[++i];
ch = text[++i];
makeword("typeidentify");
}
}
if (find(bracket.begin(), bracket.end(), ch) != bracket.end())
{
word += ch;
ch = text[++i];
makeword("bracket");
}
if (ch == '*' || ch == '=' || ch == '<' || ch == '>' || ch == '!')
{
word += ch;
if (text[i + 1] == '=')
word += text[++i];
ch = text[++i];
makeword("OPT");
}
}
}
int main()
{
text[len] = getchar();
while (text[len] != '\0' && text[len] != EOF)
{
text[++len] = getchar();
}
fun();
for (vector<Node>::iterator iter = stack.begin(); iter != stack.end(); iter++)
cout << "line" << (*iter).line << ":(" << (*iter).type << ", " << (*iter).word << ')' << endl;
}